ggplot(all, aes(x=date, y=diff_n_cont, color=genet)) +
geom_point() +
facet_wrap(~treatment) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal()
ggplot(all, aes(x=date, y=growth_rate_cont, color=genet)) +
facet_wrap(~treatment,
labeller = labeller(treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
geom_point(alpha=0.5) +
geom_smooth(method = "lm", formula = y ~ x, se = F) +
stat_poly_eq(use_label("eq"), formula = y ~ x, na.rm=TRUE) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal() +
labs(x = "Date",
y = "Cumulative population growth (no. polyps)",
color = "Genet")
#ggsave(here("experiment", "figures", "growth_genet.png"), width=12, height=7)
ggplot(all, aes(x=date, y=growth_rate, color = treatment)) +
geom_point(alpha=0.1) +
facet_wrap(~mhw, scales="free", labeller = labeller(mhw = c("during" = "During MHW", "after" = "After MHW"))) +
geom_smooth(method = "lm", formula = y ~ x, se = F) +
stat_poly_eq(use_label("eq"), formula = y ~ x, parse = TRUE, label.x.npc = "left") +
scale_color_manual(values = c("cold" = "#0072B2", "severe" = "#E69F00", "extreme" = "#D55E00"),
labels = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW")) +
theme_minimal() +
coord_cartesian(ylim = c(-5, 25), expand=TRUE) + #set equal y axes coordinates
labs(x = "Date",
y = "Cumulative population growth (no. polyps)",
color = "Treatment")
#ggsave(here("experiment", "figures", "growth_MHW.png"), width=12, height=7)
ggplot(all, aes(x=date, y=growth_rate, color = genet)) +
geom_point(alpha=0.1) +
facet_grid(mhw~treatment,
#switch = 'x',
scales = "free_x",
labeller = labeller(mhw = c("during" = "During MHW", "after" = "After MHW"),
treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
geom_smooth(method = "lm", formula = y ~ x, se = F) +
stat_poly_eq(use_label("eq"), formula = y ~ x, parse = TRUE, label.x.npc = "left") +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal() +
#coord_cartesian(ylim = c(-5, 25), expand=TRUE) + #set equal y axes coordinates
labs(x = "Date",
y = "Cumulative population growth (no. polyps)",
color = "Genet")
theme(strip.placement = "outside")
## List of 1
## $ strip.placement: chr "outside"
## - attr(*, "class")= chr [1:2] "theme" "gg"
## - attr(*, "complete")= logi FALSE
## - attr(*, "validate")= logi TRUE
#ggsave(here("experiment", "figures", "growth_MHW_genet.png"), width=12, height=10)
label_plot <- c("0.13", "0.09", "0.071", "0.0522", "0.0761",
"0.151", "0.0933", "0.0252", "0.116", "0.0413", #extreme during
"0.166", "0.142", "0.0522", "0.134", "0.0902",
"-0.0023", "0.00741", "0.026", "0.00843", "-0.0131",
"0.0102", "0.00652", "0.00881", "0.00421", "-0.0117", #extreme after
"-0.0104", "-0.000895", "0.0217", "0.0127", "-0.0131")
color_plot <- rep(c("#9370DB", "#C21B78", "#FF9933", "#FF3333", "#662B45"), times = 6)
treatment_plot <- rep(c(rep("cold", 5), rep("extreme", 5), rep("severe", 5)), times = 2)
date_plot <- c(rep("2023-10-01", 15), rep("2024-01-10", 15))
p <- ggplot(all, aes(x=date, y=growth_rate_cont, color=genet, shape=mhw)) +
geom_point(alpha=0.15) +
facet_wrap(~factor(treatment, levels=c("cold", "severe", "extreme"), labels=c("Ambient", "Severe MHW", "Extreme MHW"))) + geom_rect(aes(xmin = as.POSIXct("2023-11-30"), xmax = as.POSIXct("2023-12-10"), ymin = -5, ymax = -4.5), color = "dimgray", fill = "dimgray", alpha = 0.4) + # Add horizontal bar
geom_rect(aes(xmin = as.POSIXct("2024-02-14"), xmax = as.POSIXct("2024-02-21"), ymin = -5, ymax = -4.5), color = "dimgray", fill = "dimgray", alpha = 0.4) + # Add horizontal bar
geom_vline(xintercept = as.POSIXct("2023-12-10"), linetype = "dashed") + # Add vertical line delineating during/after MHW
geom_smooth(method = "lm", se = FALSE) +
#stat_poly_eq(use_label("eq"), formula = y ~ x, parse = TRUE) + #un-comment this to get updated values to add to label_plot
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
scale_shape_manual(values = c("during" = 16, "after" = 17)) + # Define shapes for mhw levels
theme_minimal() +
coord_cartesian(ylim = c(-5, 25), expand=TRUE) +
labs(x = "Date",
y = "Cumulative population growth (no. polyps)",
color = "Genet") +
guides(shape = "none")
loop_count = 0
for (i in 1:length(label_plot)) {
p <- p + geom_text(data = data.frame(date = as.Date("2023-12-30"), treatment = treatment_plot[i], mhw = "during"),
label=paste0("m = ", label_plot[i]), x = as.Date(date_plot[i]), y = 25-loop_count, hjust = 0, vjust = 1, color=color_plot[i])
loop_count <- loop_count + 1
if (loop_count > 4) {
loop_count <- 0 # Reset counter after every 5th iteration
}
}
p
#ggsave(here("experiment", "figures", "growth_MHW_genet_combined.png"), width=12, height=7)
g <- ggplot(all, aes(x=date, y=n_true, color=genet, shape=mhw)) +
geom_point(alpha=0.15) +
facet_wrap(~factor(treatment, levels=c("cold", "severe", "extreme"), labels=c("Ambient", "Severe MHW", "Extreme MHW"))) +
geom_rect(aes(xmin = as.POSIXct("2023-11-30"), xmax = as.POSIXct("2023-12-10"), ymin = 5, ymax = 5.5), color = "dimgray", fill = "dimgray", alpha = 0.2) + # Add horizontal bar
geom_rect(aes(xmin = as.POSIXct("2024-02-14"), xmax = as.POSIXct("2024-02-21"), ymin = 5, ymax = 5.5), color = "dimgray", fill = "dimgray", alpha = 0.2) + # Add horizontal bar
geom_vline(xintercept = as.POSIXct("2023-12-10"), linetype = "dashed", linewidth = 0.5) + # Add vertical line delineating during/after MHW
geom_smooth(method = "lm", se = FALSE) +
# stat_poly_eq(use_label("eq"), formula = y ~ x, parse = TRUE) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
scale_shape_manual(values = c("during" = 16, "after" = 17)) + # Define shapes for mhw levels
theme_minimal() +
#coord_cartesian(ylim = c(5, 40), expand=TRUE) +
labs(x = "Date",
y = "Number of polyps",
color = "Genet") +
guides(shape = "none")
loop_count = 0
for (i in 1:length(label_plot)) {
g <- g + geom_text(data = data.frame(date = as.Date("2023-12-30"), treatment = treatment_plot[i], mhw = "during"),
label=paste0("m = ", label_plot[i]), x = as.Date(date_plot[i]), y = 40-loop_count, hjust = 0, vjust = 1, color=color_plot[i])
loop_count <- loop_count + 1
if (loop_count > 4) {
loop_count <- 0 # Reset counter after every 5th iteration
}
}
g
#ggsave(here("experiment", "figures", "n_MHW_genet_combined.png"), width=12, height=7)
# %closed facet grid
ggplot(all, aes(x=genet, y=percent_closed, fill=genet, group=genet)) +
geom_point(aes(color=genet)) +
geom_violin(position="dodge", alpha=0.5, outlier.colour="transparent") +
facet_grid(mhw~treatment)
# %closed facet wrap
ggplot(all, aes(x = genet, y = percent_closed, fill = genet, group = interaction(genet, mhw), shape = mhw)) +
geom_point(aes(color = genet), position = position_dodge(width = 0.9)) +
geom_violin(aes(size = mhw), position = position_dodge(width = 0.9), alpha = 0.5, outlier.colour = "transparent") +
facet_wrap(~treatment) +
scale_size_manual(values = c("during" = 1, "after" = 0.5)) + # Define sizes for mhw levels
scale_shape_manual(values = c("during" = 16, "after" = 17)) # Define shapes for mhw levels
# %fullyOpen facet wrap
ggplot(all, aes(x = genet, y = percent_fully_open, fill = genet, group = interaction(genet, mhw), shape = mhw)) +
geom_point(aes(color = genet), position = position_dodge(width = 0.9), alpha = 0.5) +
geom_violin(aes(size = mhw), position = position_dodge(width = 0.9), alpha = 0.3, color = alpha("black", 0.75)) +
facet_wrap(~treatment,
labeller = labeller(treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
scale_size_manual(values = c("during" = 1, "after" = 0.5), labels = c("During MHW", "After MHW")) + # Define sizes for mhw levels
scale_shape_manual(values = c("during" = 16, "after" = 17), labels = c("During MHW", "After MHW")) + # Define shapes for mhw levels
# change color of violin
scale_fill_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal() +
labs(x = "Genet",
y = "% open polyps",
color = "Genet",
shape = "MHW",
size = "MHW") +
guides(fill = "none",
color = "none")
#ggsave(here("experiment", "figures", "open_MHW_genet.png"), width=12, height=7)
ggplot(all, aes(x=date, y=total_biomass)) +
geom_point(aes(color=genet), size=0.5) +
geom_smooth(aes(fill=genet, color=genet), method="lm", se=T) +
facet_wrap(~treatment,
labeller = labeller(treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
scale_fill_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal() +
labs(x = "Date",
y = "Total biomass (g)",
color = "Genet",
fill = "Genet")
#ggsave(here("experiment", "figures", "total_biomass_mhw.png"), width=12, height=7)
ggplot(all, aes(x=date, y=avg_size)) +
geom_point(aes(color=genet), size=0.5) +
geom_smooth(aes(fill=genet, color=genet), method="loess", se=T) +
facet_wrap(~treatment,
labeller = labeller(treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
scale_fill_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
scale_color_manual(values = c("A" = "#9370DB", "B" = "#C21B78", "C" = "#FF9933", "D" ="#FF3333", "E" = "#662B45")) +
theme_minimal() +
labs(x = "Date",
y = "Average body size (mm)",
color = "Genet",
fill = "Genet")
#ggsave(here("experiment", "figures", "avg_size_mhw.png"), width=12, height=7)
ggplot(all, aes(x=date, y=dying_dead, color = treatment)) +
geom_point(alpha=0.1) +
facet_wrap(~treatment,
labeller = labeller(treatment = c("cold" = "Ambient", "severe" = "Severe MHW", "extreme" = "Extreme MHW"))) +
geom_smooth(method = "lm", formula = y ~ x, se = F) +
scale_color_manual(values = c("cold" = "#0072B2", "severe" = "#E69F00", "extreme" = "#D55E00")) +
theme_minimal() +
scale_y_continuous(breaks = seq(0, 10, 2), limits = c(0, 10), labels = function(x) format(x, nsmall = 0)) +
labs(x = "Date",
y = "Mortality (no. polyps)",
color = "Treatment") +
guides(color = "none")
#ggsave(here("experiment", "figures", "mortality_MHW.png"), width=12, height=7)
all_size <- all %>%
filter(!is.na(avg_size)) %>%
mutate(avg_size_log = log(avg_size))
# scatter plot of transformed data
ggplot(all_size, aes(x=date, y=avg_size_log)) +
geom_point(aes(color=treatment)) +
labs(x = "Average body size (mm)",
y = "Frequency")
# histogram of transformed data
ggplot(all_size, aes(x=avg_size_log)) +
geom_histogram(binwidth=0.1) +
labs(x = "log(Average body size (mm))",
y = "Frequency")
# transformed data density plot
ggplot(all_size, aes(x = avg_size_log)) +
geom_density() +
ggtitle("Density Plot of Transformed Data")
# Q-Q plot of log-transformed data
qqnorm(all_size$avg_size_log)
qqline(all_size$avg_size_log, col = "red")
It’s not perfectly normal but definitely better log-transformed. Looks a
bit like exponential decay, so let’s make the data stationary by
differencing.
all_size <- all_size %>%
group_by(tank, genet) %>%
mutate(avg_size_diff = avg_size_log - lag(avg_size_log),
avg_size_diff2 = avg_size_diff - lag(avg_size_diff)) #need second order differencing for slope=0
ggplot(all_size, aes(x=date)) +
geom_point(aes(y=avg_size_diff), color="black", alpha=0.3) +
geom_point(aes(y=avg_size_diff2), color="blue", alpha=0.3) +
geom_smooth(aes(y=avg_size_diff), method="lm", color="black") +
geom_smooth(aes(y=avg_size_diff2), method="lm", color="blue") +
labs(x = "Average body size (mm)",
y = "Frequency")
# Fit linear models and extract coefficients
models <- all_size %>%
group_by(treatment, genet, mhw) %>%
do(tidy(lm(avg_size_diff ~ date, data = .))) %>%
select(treatment, genet, term, estimate) %>%
pivot_wider(names_from = term, values_from = estimate) %>%
mutate(equation = sprintf("y = %.2f + %.2e * x", `(Intercept)`, date)) %>%
rename(slope = date) %>%
ungroup() %>%
group_by(mhw) %>%
mutate(x_axis = as.POSIXct(ifelse(mhw == "during", "2023-10-15", "2024-01-15")),
y_axis = rep(seq(from = 0.2, to = 0.5, length.out = 5), length.out = n())) %>%
ungroup() %>%
select(-mhw)
# Join regression equations with the original data
all_size_reg <- all_size %>%
left_join(models, by = c("treatment", "genet"), relationship = "many-to-many")
ggplot(all_size_reg) +
geom_point(aes(x=date, y=avg_size_diff, color=genet), alpha=0.3) +
geom_smooth(aes(x=date, y=avg_size_diff, color=genet ,linetype = mhw), method="lm") +
facet_wrap(~treatment) +
geom_label_repel(data = models, aes(x=x_axis, y=y_axis, label = equation, color = genet), size = 3) +
labs(x = "Date",
y = "Differenced average body size (mm)") +
theme_bw()
#ggsave(here("experiment", "figures", "body_size_regression.png"), width=15, height=7)
# List to store results
adf_results <- list()
# Loop through each group
for (group in unique(all_size$treatment)) {
group_data <- all_size %>%
filter(treatment == group,
!is.na(avg_size_diff2)) %>%
pull(avg_size_diff2) #chnage this to avg_size_diff for first differenced data
# Perform the ADF test
adf_test <- adf.test(group_data, alternative = "stationary")
# Store the result
adf_results[[group]] <- adf_test
}
# Print results
adf_results
## $cold
##
## Augmented Dickey-Fuller Test
##
## data: group_data
## Dickey-Fuller = -7.5935, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $severe
##
## Augmented Dickey-Fuller Test
##
## data: group_data
## Dickey-Fuller = -5.3592, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
##
##
## $extreme
##
## Augmented Dickey-Fuller Test
##
## data: group_data
## Dickey-Fuller = -4.415, Lag order = 7, p-value = 0.01
## alternative hypothesis: stationary
# List to store results
kpss_results <- list()
# Loop through each group
for (group in unique(all_size$treatment)) {
group_data <- all_size %>%
filter(treatment == group,
!is.na(avg_size_diff2)) %>%
pull(avg_size_diff2) #chnage this to avg_size_diff for first differenced data
# Perform the KPSS test
kpss_test <- ur.kpss(group_data, type = "mu") # Use "tau" if you expect trend stationarity
# Store the result
kpss_results[[group]] <- summary(kpss_test)
}
# Print results
kpss_results
## $cold
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.03
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
##
## $severe
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0316
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
##
##
## $extreme
##
## #######################
## # KPSS Unit Root Test #
## #######################
##
## Test is of type: mu with 5 lags.
##
## Value of test-statistic is: 0.0844
##
## Critical value for a significance level of:
## 10pct 5pct 2.5pct 1pct
## critical values 0.347 0.463 0.574 0.739
Both p-value is less than 0.01 and test statistic is lower than critical values, so second-order differenced data is now almost certainly stationary. First-order differencing only showed strong evidence for stationarity with the ADF test.
Acf(all_size$avg_size_log, main = "ACF of Log-transformed Series")
Pacf(all_size$avg_size_log, main = "PACF of Log-transformed Series")
Acf(all_size$avg_size_diff, main = "ACF of Differenced Series")
Pacf(all_size$avg_size_diff, main = "PACF of Differenced Series")
Acf(all_size$avg_size_diff2, main = "ACF of 2-Differenced Series")
Pacf(all_size$avg_size_diff2, main = "PACF of 2-Differenced Series")
There is definitely a lot of autocorrelation with the data, for both
1-diff and 2-diff. Makes sense because body size should be highly
correlated over time.
all_size_na <- all_size %>%
filter(!is.na(avg_size_diff)) %>%
mutate(genet = as.numeric(genet),
treatment = as.numeric(treatment),
mhw = as.numeric(mhw)) %>%
select(date, tank, genet, n, treatment, mhw, avg_size, avg_size_log, avg_size_diff, avg_size_diff2)
autoArima <- auto.arima(all_size_na$avg_size_diff, seasonal = TRUE, stepwise = FALSE, approximation = FALSE)
summary(autoArima)
## Series: all_size_na$avg_size_diff
## ARIMA(1,1,2)
##
## Coefficients:
## ar1 ma1 ma2
## 0.7682 -1.5296 0.5354
## s.e. 0.0518 0.0662 0.0646
##
## sigma^2 = 0.002058: log likelihood = 2258.51
## AIC=-4509.02 AICc=-4508.99 BIC=-4488.19
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001281871 0.04529954 0.03479757 Inf Inf 0.7851294 -0.009148644
I ran this all again with second-differenced data, and the model doesn’t appear significantly better (in some cases like BIC, not better) and so it doesn’t justify the added complexity of a second-differenced model. so let’s stick with first-differenced model.
residuals_auto <- residuals(autoArima)
plot(residuals_auto, main = "Residuals of AUTOSARIMA Model")
Acf(residuals_auto, main = "ACF of Residuals")
Box.test(residuals_auto, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals_auto
## X-squared = 0.11324, df = 1, p-value = 0.7365
adf_test_residuals <- adf.test(residuals_auto, alternative = "stationary") #definitely stationary
qqnorm(residuals_auto)
qqline(residuals_auto)
shapiro.test(residuals_auto)
##
## Shapiro-Wilk normality test
##
## data: residuals_auto
## W = 0.99054, p-value = 1.217e-07
ggplot() +
geom_histogram(aes(x = residuals_auto), bins = 20, color = "black") +
labs(x = "Residuals",
y = "Frequency")
fitted_values <- fitted(autoArima)
plot(fitted_values, residuals_auto, main="Residuals vs Fitted Values")
abline(h=0, col="red")
2-order differencing is not significantly better than 1-order
differencing so let’s stick with 1-order.
AR(1) model: This is autoregressive data, and the current value of the time series depends linearly on its immediately preceding value - makes sense because body size shouldn’t change that drastically week by week. I(1): First-order differencing applied to make the series stationary - which makes sense because preliminary analyses showed slope of zero with second-differenced data. MA(2): current value of the time series depends on the last two periods’ forecast errors - means that the current value of the series depends on the errors made in the previous two periods, which doesn’t make a ton of intuitive sense except maybe if there are big changes to body size (i.e. reproduction?) that just means we won’t see those changes finish until 2 weeks later? <<< CHECK THIS LOGIC.
all_size_ts <- ts(all_size_na$avg_size_diff, frequency = 52) #weekly data as ts object
class(all_size_ts)
## [1] "ts"
summary(all_size_ts)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -0.325110 -0.060547 -0.027665 -0.030876 0.002888 0.168555
acf(all_size_ts)
pacf(all_size_ts)
decomposition <- stl(all_size_ts, s.window = "periodic")
plot(decomposition)
There’s no apparent seasonality and a slight positive trend in the
data
# Fit ARIMA with different seasonal orders
model1 <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 1))
model2 <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 2))
model3 <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 3))
Acf(residuals(model1), main = paste("ACF of Residuals model1"))
Pacf(residuals(model1), main = paste("PACF of Residuals model1"))
Acf(residuals(model2), main = paste("ACF of Residuals model2"))
Pacf(residuals(model2), main = paste("PACF of Residuals model2"))
Acf(residuals(model3), main = paste("ACF of Residuals model3"))
Pacf(residuals(model3), main = paste("PACF of Residuals model3"))
Box.test(residuals(model1), lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(model1)
## X-squared = 48.604, df = 20, p-value = 0.0003498
Box.test(residuals(model2), lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(model2)
## X-squared = 30.952, df = 20, p-value = 0.05582
Box.test(residuals(model3), lag = 20, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals(model3)
## X-squared = 30.904, df = 20, p-value = 0.05647
I(2) is definitely the best fit.
#create matrix of dummy variables incorporating all potential covariates
xreg_All <- model.matrix(~ treatment + genet + mhw - 1, data = all_size_na)
xreg_NoGenet <- model.matrix(~ treatment + mhw - 1, data = all_size_na)
xreg_MHW <- model.matrix(~ mhw - 1, data = all_size_na)
xreg_Treatment <- model.matrix(~ treatment - 1, data = all_size_na)
arima_covariates_all <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 2), xreg = xreg_All)
summary(arima_covariates_all)
## Series: all_size_na$avg_size_diff
## Regression with ARIMA(1,1,2) errors
##
## Coefficients:
## ar1 ma1 ma2 treatment genet mhw
## 0.7657 -1.5291 0.5349 -0.0029 -3e-04 0.0011
## s.e. 0.0535 0.0676 0.0657 0.0025 8e-04 0.0154
##
## sigma^2 = 0.00206: log likelihood = 2259.25
## AIC=-4504.5 AICc=-4504.41 BIC=-4468.05
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001340997 0.04527438 0.03478043 NaN Inf 0.7847427 -0.008252592
arima_covariates_noGenet <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 2), xreg = xreg_NoGenet)
summary(arima_covariates_noGenet)
## Series: all_size_na$avg_size_diff
## Regression with ARIMA(1,1,2) errors
##
## Coefficients:
## ar1 ma1 ma2 treatment mhw
## 0.7661 -1.5293 0.5351 -0.0029 0.0012
## s.e. 0.0539 0.0682 0.0662 0.0025 0.0155
##
## sigma^2 = 0.002059: log likelihood = 2259.17
## AIC=-4506.35 AICc=-4506.28 BIC=-4475.1
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001324029 0.04527702 0.03477904 NaN Inf 0.7847113 -0.008522662
arima_covariates_mhw <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 2), xreg = xreg_MHW)
summary(arima_covariates_mhw)
## Series: all_size_na$avg_size_diff
## Regression with ARIMA(1,1,2) errors
##
## Coefficients:
## ar1 ma1 ma2 mhw
## 0.7750 -1.5384 0.5437 0.0039
## s.e. 0.0512 0.0657 0.0641 0.0149
##
## sigma^2 = 0.002059: log likelihood = 2258.54
## AIC=-4507.08 AICc=-4507.04 BIC=-4481.05
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001260464 0.04529743 0.03478482 Inf Inf 0.7848417 -0.007205159
arima_covariates_treatment <- Arima(all_size_na$avg_size_diff, order = c(1, 1, 2), xreg = xreg_Treatment)
summary(arima_covariates_treatment)
## Series: all_size_na$avg_size_diff
## Regression with ARIMA(1,1,2) errors
##
## Coefficients:
## ar1 ma1 ma2 treatment
## 0.7651 -1.5282 0.5341 -0.0029
## s.e. 0.0520 0.0663 0.0646 0.0025
##
## sigma^2 = 0.002058: log likelihood = 2259.17
## AIC=-4508.34 AICc=-4508.3 BIC=-4482.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.00134849 0.04527726 0.03478006 NaN Inf 0.7847344 -0.008539237
Looks like genet is not a significant covariate, but treatment and during/after MHW are slightly impactful. <<< DISCUSS WITH CHRIS.
Auto-ARIMA with Covariates model (check AR, I, MA)
xreg_Covariate <- model.matrix(~ genet + mhw - 1, data = all_size_na)
autoArima_Covariate <- auto.arima(all_size_na$avg_size_diff, seasonal = TRUE, stepwise = FALSE, approximation = FALSE, xreg = xreg_Covariate)
summary(autoArima_Covariate)
## Series: all_size_na$avg_size_diff
## Regression with ARIMA(1,1,2) errors
##
## Coefficients:
## ar1 ma1 ma2 genet mhw
## 0.7673 -1.5283 0.5340 -3e-04 0.0016
## s.e. 0.0532 0.0675 0.0657 8e-04 0.0153
##
## sigma^2 = 0.002061: log likelihood = 2258.6
## AIC=-4505.2 AICc=-4505.14 BIC=-4473.96
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.001274559 0.04529606 0.03479419 Inf Inf 0.7850531 -0.009482209
No change to AR, I, or MA with any of the potential covariates.
So, for first-differenced data, here are the two model options #### Basic ARIMA(1,1,2): Coefficients: ar1 ma1 ma2 0.7682 -1.5296 0.5354 s.e. 0.0518 0.0662 0.0646
sigma^2 = 0.002058: log likelihood = 2258.51 AIC=-4509.02 AICc=-4508.99 BIC=-4488.19
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.001281871 0.04529954 0.03479757 Inf Inf 0.7851294 -0.009148644
ARIMA(1,1,2) with MHW/Treatment covariates: Coefficients: ar1 ma1 ma2 treatment mhw 0.7661 -1.5293 0.5351 -0.0029 0.0012 s.e. 0.0539 0.0682 0.0662 0.0025 0.0155
sigma^2 = 0.002059: log likelihood = 2259.17 AIC=-4506.35 AICc=-4506.28 BIC=-4475.1
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.001324029 0.04527702 0.03477904 NaN Inf 0.7847113 -0.008522662 ####
residuals_auto <- residuals(arima_covariates_noGenet)
plot(residuals_auto, main = "Residuals of AUTOSARIMA Model")
Acf(residuals_auto, main = "ACF of Residuals")
Box.test(residuals_auto, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals_auto
## X-squared = 0.098276, df = 1, p-value = 0.7539
adf_test_residuals <- adf.test(residuals_auto, alternative = "stationary") #definitely stationary
qqnorm(residuals_auto)
qqline(residuals_auto)
shapiro.test(residuals_auto)
##
## Shapiro-Wilk normality test
##
## data: residuals_auto
## W = 0.99067, p-value = 1.463e-07
ggplot() +
geom_histogram(aes(x = residuals_auto), bins = 20, color = "black") +
labs(x = "Residuals",
y = "Frequency")
fitted_values <- fitted(arima_covariates_noGenet)
plot(fitted_values, residuals_auto, main="Residuals vs Fitted Values")
abline(h=0, col="red")
autoArima2 <- auto.arima(all_size_na$avg_size_diff2, seasonal = TRUE, stepwise = FALSE, approximation = FALSE)
summary(autoArima2)
## Series: all_size_na$avg_size_diff2
## ARIMA(1,0,4) with zero mean
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4
## 0.9289 -0.7029 0.0217 -0.0357 -0.1054
## s.e. 0.0412 0.0506 0.0356 0.0353 0.0303
##
## sigma^2 = 0.005237: log likelihood = 1541.35
## AIC=-3070.71 AICc=-3070.64 BIC=-3039.8
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0007083893 0.0722271 0.05541204 -20.12162 319.208 0.7668489
## ACF1
## Training set 0.000179781
AR(1) model: This is autoregressive data, and the current value of the time series depends linearly on its immediately preceding value - makes sense because body size shouldn’t change that drastically week by week. I(0): No differencing applied to make the series stationary - which makes sense because the body size data has been differenced twice which we know makes it stationary. MA(4): current value of the time series depends on the last FOUR periods’ forecast errors - means that the current value of the series depends on the errors made in the previous four periods. Greater value than first-differenced data model MA(2). Meaning??
residuals_auto2 <- residuals(autoArima2)
plot(residuals_auto2, main = "Residuals of AUTOSARIMA Model")
Acf(residuals_auto2, main = "ACF of Residuals")
Box.test(residuals_auto2, type = "Ljung-Box")
##
## Box-Ljung test
##
## data: residuals_auto2
## X-squared = 4.1307e-05, df = 1, p-value = 0.9949
adf_test_residuals2 <- adf.test(residuals_auto2, alternative = "stationary") #definitely stationary
qqnorm(residuals_auto2)
qqline(residuals_auto2)
shapiro.test(residuals_auto2)
##
## Shapiro-Wilk normality test
##
## data: residuals_auto2
## W = 0.99254, p-value = 4.801e-06
ggplot() +
geom_histogram(aes(x = residuals_auto2), bins = 20, color = "black") +
labs(x = "Residuals",
y = "Frequency")
fitted_values2 <- fitted(autoArima2)
plot(fitted_values2, residuals_auto2, main="Residuals vs Fitted Values")
abline(h=0, col="red")
Shapiro-Wilks test says residuals are close to normal distribution but
not normally distributed… but the plots look fine.
#create matrix of dummy variables incorporating all potential covariates
xreg_All <- model.matrix(~ treatment + genet + mhw - 1, data = all_size_na)
xreg_NoGenet <- model.matrix(~ treatment + mhw - 1, data = all_size_na)
xreg_NoTreatment <- model.matrix(~ genet + mhw - 1, data = all_size_na)
xreg_NoMHW <- model.matrix(~ treatment + genet - 1, data = all_size_na)
xreg_MHW <- model.matrix(~ mhw - 1, data = all_size_na)
xreg_Treatment <- model.matrix(~ treatment - 1, data = all_size_na)
xreg_Genet <- model.matrix(~ genet - 1, data = all_size_na)
arima_no_covariates2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4))
summary(arima_no_covariates2)
## Series: all_size_na$avg_size_diff2
## ARIMA(1,0,4) with non-zero mean
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 mean
## 0.9271 -0.7011 0.0222 -0.0353 -0.1049 0.0018
## s.e. 0.0425 0.0517 0.0356 0.0353 0.0304 0.0050
##
## sigma^2 = 0.005241: log likelihood = 1541.42
## AIC=-3068.84 AICc=-3068.75 BIC=-3032.79
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.024331e-05 0.07222344 0.05540808 167.9321 253.6381 0.7754863
## ACF1
## Training set 0.0002677154
arima_covariates_all2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_All)
summary(arima_covariates_all2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept treatment genet
## 0.9284 -0.7025 0.0218 -0.0357 -0.1054 0.0022 -0.0006 0.0001
## s.e. 0.0416 0.0509 0.0356 0.0353 0.0304 0.0185 0.0042 0.0012
## mhw
## 0.0003
## s.e. 0.0099
##
## sigma^2 = 0.005253: log likelihood = 1541.44
## AIC=-3062.88 AICc=-3062.7 BIC=-3011.37
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.928559e-05 0.07222242 0.0554114 167.5572 253.8037 0.7755327
## ACF1
## Training set 0.0002797304
arima_covariates_noGenet2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_NoGenet)
summary(arima_covariates_noGenet2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept treatment mhw
## 0.9296 -0.7038 0.0216 -0.0361 -0.1057 0.0025 -0.0006 0.0004
## s.e. 0.0407 0.0501 0.0355 0.0352 0.0303 0.0182 0.0042 0.0099
##
## sigma^2 = 0.005249: log likelihood = 1541.43
## AIC=-3064.86 AICc=-3064.72 BIC=-3018.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.762402e-05 0.07222283 0.05540972 166.8089 254.3437 0.7755092
## ACF1
## Training set 0.0003822157
arima_covariates_noTreatment2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_NoTreatment)
summary(arima_covariates_noTreatment2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept genet mhw
## 0.9266 -0.7006 0.0223 -0.0352 -0.1047 0.0011 0.0001 0.0003
## s.e. 0.0428 0.0520 0.0356 0.0353 0.0305 0.0163 0.0012 0.0098
##
## sigma^2 = 0.005249: log likelihood = 1541.43
## AIC=-3064.85 AICc=-3064.71 BIC=-3018.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -6.590817e-05 0.07222308 0.05541425 168.3794 253.2696 0.7755727
## ACF1
## Training set 0.00023234
arima_covariates_noMHW2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_NoMHW)
summary(arima_covariates_noMHW2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept treatment genet
## 0.9276 -0.7017 0.0220 -0.0354 -0.1051 0.0025 -0.0006 0.0001
## s.e. 0.0423 0.0515 0.0356 0.0353 0.0304 0.0104 0.0042 0.0012
##
## sigma^2 = 0.005249: log likelihood = 1541.44
## AIC=-3064.87 AICc=-3064.73 BIC=-3018.52
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.477606e-05 0.07222246 0.05540808 167.4926 253.5783 0.7754863
## ACF1
## Training set 0.0002554762
arima_covariates_Genet2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_Genet)
summary(arima_covariates_Genet2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept genet
## 0.9272 -0.7013 0.0221 -0.0353 -0.1050 0.0014 0.0001
## s.e. 0.0424 0.0516 0.0356 0.0353 0.0304 0.0062 0.0012
##
## sigma^2 = 0.005245: log likelihood = 1541.43
## AIC=-3066.85 AICc=-3066.74 BIC=-3025.65
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.658259e-05 0.07222305 0.05541292 168.5765 253.4582 0.7755541
## ACF1
## Training set 0.0002772473
arima_covariates_mhw2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_MHW)
summary(arima_covariates_mhw2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept mhw
## 0.9276 -0.7017 0.0220 -0.0355 -0.1051 0.0014 0.0002
## s.e. 0.0420 0.0513 0.0356 0.0353 0.0304 0.0159 0.0099
##
## sigma^2 = 0.005245: log likelihood = 1541.42
## AIC=-3066.84 AICc=-3066.73 BIC=-3025.64
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.976376e-05 0.07222341 0.05540994 167.5124 253.7161 0.7755123
## ACF1
## Training set 0.0002668085
arima_covariates_treatment2 <- Arima(all_size_na$avg_size_diff2, order = c(1, 0, 4), xreg = xreg_Treatment)
summary(arima_covariates_treatment2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 intercept treatment
## 0.9287 -0.7029 0.0217 -0.0357 -0.1055 0.0031 -0.0006
## s.e. 0.0414 0.0507 0.0356 0.0353 0.0303 0.0098 0.0042
##
## sigma^2 = 0.005245: log likelihood = 1541.43
## AIC=-3066.86 AICc=-3066.75 BIC=-3025.66
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.102096e-05 0.07222282 0.05540557 167.0637 254.1057 0.7754512
## ACF1
## Training set 0.0003115088
TreatmentxGenetxMHW model performs the worst. TreatmentxMHW, GenetxMHW, TreatmentxGenet perform second worst. No covariates performs best, Treatment alone, Genet alone, and MHW alone all perform second best. This probably means we should use no covariates in second-differenced data model?
Auto-ARIMA with Covariates model (check AR, I, MA)
xreg_Covariate2 <- model.matrix(~ treatment + genet + mhw - 1, data = all_size_na)
autoArima_Covariate2 <- auto.arima(all_size_na$avg_size_diff2, seasonal = TRUE, stepwise = FALSE, approximation = FALSE, xreg = xreg_Covariate2)
summary(autoArima_Covariate2)
## Series: all_size_na$avg_size_diff2
## Regression with ARIMA(1,0,4) errors
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4 treatment genet mhw
## 0.9280 -0.7021 0.0219 -0.0356 -0.1052 -0.0004 0.0002 0.0013
## s.e. 0.0419 0.0511 0.0356 0.0353 0.0304 0.0037 0.0012 0.0056
##
## sigma^2 = 0.005249: log likelihood = 1541.43
## AIC=-3064.86 AICc=-3064.72 BIC=-3018.5
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.247646e-05 0.07222285 0.05542077 -18.00826 321.5899 0.7669697
## ACF1
## Training set 0.0002889995
Same as first-differenced data, no change to AR, I, or MA with any of the potential covariates.
For second-differenced data, here is the apparent best model option #### Series: all_size_na$avg_size_diff2 ARIMA(1,0,4) with zero mean Coefficients: ar1 ma1 ma2 ma3 ma4 0.9289 -0.7029 0.0217 -0.0357 -0.1054 s.e. 0.0412 0.0506 0.0356 0.0353 0.0303
sigma^2 = 0.005237: log likelihood = 1541.35 AIC=-3070.71 AICc=-3070.64 BIC=-3039.8
Training set error measures: ME RMSE MAE MPE MAPE MASE Training set 0.0007083893 0.0722271 0.05541204 -20.12162 319.208 0.7668489 ACF1 Training set 0.000179781 ####
# Create the plots
plot_undifferenced <- ggplot(all_size_na, aes(x = date, y = avg_size_log, group=as.factor(genet))) +
geom_point(aes(color=as.factor(genet)), alpha=0.3) +
geom_smooth(aes(color=as.factor(genet)), method="lm") +
ggtitle("Undifferenced Data") +
xlab("Date") +
ylab("Log Body size (mm)") +
theme_bw()
plot_differenced <- ggplot(all_size_na, aes(x = date, y = avg_size_diff, group=as.factor(genet))) +
geom_point(aes(color=as.factor(genet)), alpha=0.3) +
geom_smooth(aes(color=as.factor(genet)), method="lm") +
ggtitle("Differenced Data") +
xlab("Date") +
ylab("Differenced Body Size (mm)") +
theme_bw()
plot_differenced2 <- ggplot(all_size_na, aes(x = date, y = avg_size_diff2, group=as.factor(genet))) +
geom_point(aes(color=as.factor(genet)), alpha=0.3) +
geom_smooth(aes(color=as.factor(genet)), method="lm") +
ggtitle("2-Differenced Data") +
xlab("Date") +
ylab("2-Differenced Body Size (mm)") +
theme_bw()
combined_plot <- grid.arrange(plot_undifferenced, plot_differenced, plot_differenced2, ncol = 3)
#ggsave(here("experiment", "figures", "differenced_comparison_genet.png"), combined_plot, width=14, height=7)
Well damn, that makes sense why genet is not a significant covariate in the differenced data models!
# Create the plots
plot_undifferenced <- ggplot(all_size_na, aes(x = date, y = avg_size_log, group=as.factor(treatment))) +
geom_point(aes(color=as.factor(treatment)), alpha=0.3) +
geom_smooth(aes(color=as.factor(treatment)), method="lm") +
ggtitle("Undifferenced Data") +
xlab("Date") +
ylab("Log Body size (mm)") +
theme_bw()
plot_differenced <- ggplot(all_size_na, aes(x = date, y = avg_size_diff, group=as.factor(treatment))) +
geom_point(aes(color=as.factor(treatment)), alpha=0.3) +
geom_smooth(aes(color=as.factor(treatment)), method="lm") +
ggtitle("Differenced Data") +
xlab("Date") +
ylab("Differenced Body Size (mm)") +
theme_bw()
plot_differenced2 <- ggplot(all_size_na, aes(x = date, y = avg_size_diff2, group=as.factor(treatment))) +
geom_point(aes(color=as.factor(treatment)), alpha=0.3) +
geom_smooth(aes(color=as.factor(treatment)), method="lm") +
ggtitle("2-Differenced Data") +
xlab("Date") +
ylab("2-Differenced Body Size (mm)") +
theme_bw()
combined_plot <- grid.arrange(plot_undifferenced, plot_differenced, plot_differenced2, ncol = 3)
#ggsave(here("experiment", "figures", "differenced_comparison_treatment.png"), combined_plot, width=14, height=7)
No difference in linear slope between three treatments
# Create the plots
plot_undifferenced <- ggplot(all_size_na, aes(x = date, y = avg_size_log, group=as.factor(mhw))) +
geom_point(aes(color=as.factor(mhw)), alpha=0.3) +
geom_smooth(aes(color=as.factor(mhw)), method="lm") +
ggtitle("Undifferenced Data") +
xlab("Date") +
ylab("Log Body size (mm)") +
theme_bw()
plot_differenced <- ggplot(all_size_na, aes(x = date, y = avg_size_diff, group=as.factor(mhw))) +
geom_point(aes(color=as.factor(mhw)), alpha=0.3) +
geom_smooth(aes(color=as.factor(mhw)), method="lm") +
ggtitle("Differenced Data") +
xlab("Date") +
ylab("Differenced Body Size (mm)") +
theme_bw()
plot_differenced2 <- ggplot(all_size_na, aes(x = date, y = avg_size_diff2, group=as.factor(mhw))) +
geom_point(aes(color=as.factor(mhw)), alpha=0.3) +
geom_smooth(aes(color=as.factor(mhw)), method="lm") +
ggtitle("2-Differenced Data") +
xlab("Date") +
ylab("2-Differenced Body Size (mm)") +
theme_bw()
combined_plot <- grid.arrange(plot_undifferenced, plot_differenced, plot_differenced2, ncol = 3)
#ggsave(here("experiment", "figures", "differenced_comparison_mhw.png"), combined_plot, width=14, height=7)
MHW slope is definitely different in undifferenced data.
So… MHW and Genet do appear different in the raw body size data. But that doesn’t show up in the Differenced data… how do I reconcile this for the ARIMA? ###
future_predictions <- forecast(autoArima, h = 50) # Forecast h periods ahead
plot(future_predictions)
Just for conceptual understanding, create a quick autoARIMA model on undifferenced data and plot forecast
autoArima_undiff <- auto.arima(all_size_na$avg_size_log, seasonal = TRUE, stepwise = FALSE, approximation = FALSE)
summary(autoArima_undiff)
## Series: all_size_na$avg_size_log
## ARIMA(5,1,0)
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5
## -0.8009 -0.7477 -0.8069 -0.5740 -0.0480
## s.e. 0.0272 0.0312 0.0301 0.0312 0.0273
##
## sigma^2 = 0.008836: log likelihood = 1276.73
## AIC=-2541.47 AICc=-2541.4 BIC=-2510.22
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.001503275 0.09379348 0.07551933 -0.4226821 4.927029 0.6617219
## ACF1
## Training set 0.001854019
future_predictions <- forecast(autoArima_undiff, h = 50) # Forecast h periods ahead
plot(future_predictions)